In [1]:
from __future__ import print_function
import matplotlib.pyplot as plt
from tqdm import tqdm
import mxnet as mx
from mxnet import gluon
In [2]:
data_ctx = mx.cpu()
model_ctx = mx.cpu()
In [3]:
num_inputs = 2
num_outputs = 1
num_examples = 10000
In [4]:
w1_true = 2
w2_true = -3.4
b_true = 4.2
In [5]:
# Defining an example function that parameters we are trying to find
def real_fn(X):
# Equation
# 2 * x1 -3.4 * x2 + 4.2
return w1_true * X[:, 0] + w2_true * X[:, 1] + b_true
In [6]:
# Generating random X
X = mx.nd.random_normal(shape=(num_examples, num_inputs),
ctx=data_ctx)
In [7]:
# Generating random noise
noise = 0.1 * mx.nd.random_normal(shape=(num_examples, ), ctx=data_ctx)
In [8]:
noise.shape
Out[8]:
In [9]:
# Generating Y
y = real_fn(X) + noise
In [10]:
print(X[0])
print(y[0])
In [11]:
print(w1_true * X[0, 0] - w2_true * X[0, 1] + b_true)
In [12]:
# Defining batch_size
batch_size = 4
In [13]:
# Creating a data iterator
train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y),
batch_size=batch_size,
shuffle=True)
In [14]:
# Getting a single batch
for i, (data, label) in enumerate(train_data):
print(data, label)
break
In [15]:
# When shuffle=True, each time the batch will be different
for i, (data, label) in enumerate(train_data):
print(data, label)
break
In [16]:
# 10000 samples batches into 4 samples per batch yields 2500 batches
counter = 0
for i, (data, label) in enumerate(train_data):
pass
print(i + 1)
In [17]:
w = mx.nd.random_normal(shape=(num_inputs, num_outputs),
ctx=model_ctx)
b = mx.nd.random_normal(shape=num_outputs,
ctx=model_ctx)
params = [w, b]
In [18]:
w.shape
Out[18]:
In [19]:
b.shape
Out[19]:
In [20]:
# Attaching gradients
for param in params:
param.attach_grad()
In [21]:
# Defining network
def net(X):
return mx.nd.dot(X, w) + b
In [22]:
# Defining the loss function
def square_loss(yhat, y):
return mx.nd.mean((yhat - y) ** 2)
In [23]:
# Defining Stochastic Gradient Descent
def SGD(params, lr):
for param in params:
param[:] = param - lr * param.grad
In [24]:
# Definint training parameters
epochs = 10
learning_rate = .0001
In [25]:
num_batches = num_examples / batch_size
num_batches
Out[25]:
In [26]:
for e in range(epochs):
cumulative_loss = 0
# Batch training
for i, (data, label) in tqdm(enumerate(train_data), ascii=True):
data = data.as_in_context(model_ctx)
label = label.as_in_context(model_ctx).reshape((-1, 1))
with mx.autograd.record():
output = net(data)
loss = square_loss(output, label)
loss.backward()
# Applying the change
SGD(params, learning_rate)
cumulative_loss += loss.asscalar()
print('Epoch: {}'.format(e))
print(cumulative_loss / num_batches)
In [27]:
print('True values:')
print(w1_true)
print(w2_true)
print(b_true)
In [28]:
w1_predicted = params[0][0]
w2_predicted = params[0][1]
b1_predicted = params[1][0]
In [29]:
print('Predicted values:')
print(w1_predicted.asscalar())
print(w2_predicted.asscalar())
print(b1_predicted.asscalar())